######################
##### PREPARATION ####
######################
rm(list=setdiff(ls(), "path_to"))

# Load survey data
s = readRDS(path_to("wide"))

# Load original coded data
files = list.files(path_to("coded_data"))
files = files[grepl("_pref_", files)]
coded = list()
for (f in files) {
  coded[[f]] = read.csv2(path_to("coded_data", f), as.is=T)
  colnames(coded[[f]])[1] = "ResponseId"
  coded[[f]]$file = f
  if (grepl("conflicts", f)) {
    coded[[f]] = coded[[f]] %>% filter(ResponseId != "" & coder == "")
    coded[[f]]$coder = "conflict"
  }
}
coded = rbind.fill(coded)


######################
##### ILLEGAL CODES? #
######################
# Legal codes
legal_codes = c("conseq", "deont", "conseqstill", "deont:principle", "deont:feelgood", "deont:ownpart", "other", "misunderstanding", "junk")

# Delete white space
coded$code_conseq = gsub(" ", "", coded$code_conseq)

# Save original code
coded$old = coded$code_conseq

# Correct typos
# Transform the code "endogenous" to "other" (only two cases found)
rnm = c(
  "conseqstill" = "conseqstilll",
  "deont:principle" = "deont:princicple"
)
for (i in 1:length(rnm)) {
  coded$code_conseq = gsub(rnm[i], names(rnm)[i], coded$code_conseq, perl=T)
}

# Check whether any patterns do not match the codebook
codes = unique(unlist(strsplit(coded$code_conseq, ",")))
illegal = codes[!codes %in% legal_codes]
illegal

# Return problematic cases
cases = which(sapply(strsplit(coded$code_conseq, ","), function(x) {any(illegal %in% x)}))
coded[cases, c("ResponseId", "file")]

# Check whether renaming was successful
#coded[coded$old != coded$code_conseq, c("old", "code_conseq")] %>% View

# Bring codes in alphabetical order
coded$code_conseq = sapply(strsplit(coded$code_conseq, ","), function(x) {
  paste0(x[order(x)], collapse=",")
})

# Replace "" with NA.
if (any(coded$code_conseq == "" &
        !(paste0(coded$wtp_explain_diff, coded$wtp_explain_pos, coded$wtp_explain_same) == ""))) {
  message("Missing code even though text data is available")
}
coded$code_conseq = plyr::mapvalues(coded$code_conseq, from = "", to = NA) 


######################
##### SIMPLIFY CODES #
#######################
# Rename and delete
coded$code_conseq_simple = coded$code_conseq
rnm = c(
  "deont" = "deont:feelgood|deont:principle|deont:ownpart",
  "deont" = "deont,deont|deont,deont,deont",
  "other" = "junk|misunderstanding"
)
for (i in 1:length(rnm)) {
  coded$code_conseq_simple = gsub(rnm[i], names(rnm)[i], coded$code_conseq_simple, perl=T)
}

# Delete unneeded columns
coded$old = NULL

# Export original codes for IRR calculation
saveRDS(coded[!grepl("conflict|robust", coded$file), ], path_to("codes_conseq_preconflict"))


###########################
##### CONFLICT MANAGEMENT #
###########################
# Extract originals and conflict solution files
originals = coded[!grepl("conflict", coded$file), ]
solutions = coded[grepl("conflict", coded$file), ]
originals = originals %>% filter(ResponseId %in% s$ResponseId)
solutions = solutions %>% filter(ResponseId %in% s$ResponseId)

# Always two codes for originals, only one solution code
table(sapply(originals$ResponseId, function(x) sum(originals$ResponseId %in% x)),
      sapply(1:nrow(originals), function(x) originals$file[x])) # All 2
table(table(solutions$ResponseId)) # All 1

# Coders agree
no_conflict = unique(originals$ResponseId) %>%
  .[sapply(., function(i) {
    originals[originals$ResponseId %in% i, "code_conseq"] %>% unique %>% length == 1
  })]

# Coders disagree
conflict = unique(originals$ResponseId) %>%
  .[sapply(., function(i) {
    originals[originals$ResponseId %in% i, "code_conseq"] %>% unique %>% length > 1
  })]

# Conflict solution for each disagreement available
table(conflict %in% solutions$ResponseId) # All TRUE

# Determine open conflicts
open_conflicts = conflict[!conflict %in% solutions$ResponseId]
length(open_conflicts) == 0 # Should be TRUE

# Step 1: Take no_conflict cases from the first coder
coded = coded %>% filter(ResponseId %in% no_conflict)
coded = coded[!duplicated(coded$ResponseId), ]

# Step 2: Add solved conflicts
coded = rbind(coded, solutions[solutions$ResponseId %in% conflict, ])

# Manually check whether only legal code combinations are used
table(coded$code_conseq_simple)
table(coded$code_conseq)


##############################################################
########## SAVE DATA #########################################
##############################################################
saveRDS(coded, path_to("coded_conseq"))
